{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MLP MNIST with data augmentation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"sequential_2\"\n",
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "dense_4 (Dense)              (None, 256)               200960    \n",
      "_________________________________________________________________\n",
      "activation_4 (Activation)    (None, 256)               0         \n",
      "_________________________________________________________________\n",
      "dense_5 (Dense)              (None, 256)               65792     \n",
      "_________________________________________________________________\n",
      "activation_5 (Activation)    (None, 256)               0         \n",
      "_________________________________________________________________\n",
      "dense_6 (Dense)              (None, 10)                2570      \n",
      "_________________________________________________________________\n",
      "activation_6 (Activation)    (None, 10)                0         \n",
      "=================================================================\n",
      "Total params: 269,322\n",
      "Trainable params: 269,322\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n",
      "Using real-time data augmentation.\n",
      "Epoch 1/20\n",
      "Epoch 2/20\n",
      "Epoch 3/20\n",
      "Epoch 4/20\n",
      "Epoch 5/20\n",
      "Epoch 6/20\n",
      "Epoch 7/20\n",
      "Epoch 8/20\n",
      "Epoch 9/20\n",
      "Epoch 10/20\n",
      "Epoch 11/20\n",
      "Epoch 12/20\n",
      "Epoch 13/20\n",
      "Epoch 14/20\n",
      "Epoch 15/20\n",
      "Epoch 16/20\n",
      "Epoch 17/20\n",
      "Epoch 18/20\n",
      "Epoch 19/20\n",
      "Epoch 20/20\n",
      "10000/10000 [==============================] - 0s 17us/step\n",
      "Test loss: 0.06202767642540857\n",
      "Test accuracy: 98.1%\n"
     ]
    }
   ],
   "source": [
    "'''\n",
    "MLP network for MNIST digits classification w/ data augment\n",
    "Test accuracy: 97.7\n",
    "'''\n",
    "\n",
    "from __future__ import absolute_import\n",
    "from __future__ import division\n",
    "from __future__ import print_function\n",
    "\n",
    "# numpy package\n",
    "import numpy as np\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense, Activation\n",
    "from keras.preprocessing.image import ImageDataGenerator\n",
    "from keras.datasets import mnist\n",
    "from keras.utils import to_categorical\n",
    "\n",
    "# load mnist dataset\n",
    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
    "\n",
    "# compute the number of labels\n",
    "num_labels = np.amax(y_train) + 1\n",
    "# convert to one-hot vector\n",
    "y_train = to_categorical(y_train)\n",
    "y_test = to_categorical(y_test)\n",
    "\n",
    "# image dimensions (assumed square)\n",
    "image_size = x_train.shape[1]\n",
    "input_size = image_size * image_size\n",
    "# we train our network using float data\n",
    "x_train = x_train.astype('float32') / 255\n",
    "x_test = x_test.astype('float32') / 255\n",
    "\n",
    "# network parameters\n",
    "batch_size = 128\n",
    "hidden_units = 256\n",
    "data_augmentation = True\n",
    "epochs = 20\n",
    "max_batches = len(x_train) / batch_size\n",
    "\n",
    "# this is 3-layer MLP with ReLU after each layer\n",
    "model = Sequential()\n",
    "model.add(Dense(hidden_units, input_dim=input_size))\n",
    "model.add(Activation('relu'))\n",
    "model.add(Dense(hidden_units))\n",
    "model.add(Activation('relu'))\n",
    "model.add(Dense(num_labels))\n",
    "# this is the output for one-hot vector\n",
    "model.add(Activation('softmax'))\n",
    "model.summary()\n",
    "\n",
    "# loss function for one-hot vector\n",
    "# use of sgd optimizer\n",
    "# accuracy is good metric for classification tasks\n",
    "model.compile(loss='categorical_crossentropy',\n",
    "              optimizer='sgd',\n",
    "              metrics=['accuracy'])\n",
    "\n",
    "# validate the model on test dataset to determine generalization\n",
    "# score = model.evaluate(x_test, y_test, batch_size=batch_size)\n",
    "# print(\"\\nTest accuracy: %.1f%%\" % (100.0 * score[1]))\n",
    "\n",
    "# Run training, with or without data augmentation.\n",
    "if not data_augmentation:\n",
    "    print('Not using data augmentation.')\n",
    "    # train the network no data augmentation\n",
    "    x_train = np.reshape(x_train, [-1, input_size])\n",
    "    model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size)\n",
    "else:\n",
    "    print('Using real-time data augmentation.')\n",
    "    # This will do preprocessing and realtime data augmentation:\n",
    "    # we need [width, height, channel] dim for data aug\n",
    "    x_train = np.reshape(x_train, [-1, image_size, image_size, 1])\n",
    "    datagen = ImageDataGenerator(\n",
    "        featurewise_center=False,  # set input mean to 0 over the dataset\n",
    "        samplewise_center=False,  # set each sample mean to 0\n",
    "        featurewise_std_normalization=False,  # divide inputs by std of dataset\n",
    "        samplewise_std_normalization=False,  # divide each input by its std\n",
    "        zca_whitening=False,  # apply ZCA whitening\n",
    "        rotation_range=5.0,  # randomly rotate images in the range (deg 0 to 180)\n",
    "        width_shift_range=2.0,  # randomly shift images horizontally\n",
    "        height_shift_range=2.0,  # randomly shift images vertically\n",
    "        horizontal_flip=False,  # randomly flip images\n",
    "        vertical_flip=False)  # randomly flip images\n",
    "\n",
    "    # Compute quantities required for featurewise normalization\n",
    "    # (std, mean, and principal components if ZCA whitening is applied).\n",
    "    datagen.fit(x_train)\n",
    "    for e in range(epochs):\n",
    "        batches = 0\n",
    "        for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size=batch_size):\n",
    "            x_batch = np.reshape(x_batch, [-1, input_size])\n",
    "            model.fit(x_batch, y_batch, verbose=0)\n",
    "            batches += 1\n",
    "            \n",
    "            if batches >= max_batches:\n",
    "                # we need to break the loop by hand because\n",
    "                # the generator loops indefinitely\n",
    "                break\n",
    "        print(\"Epoch %d/%d\" % (e+1, epochs))\n",
    "\n",
    "# Score trained model.\n",
    "x_test = np.reshape(x_test, [-1, input_size])\n",
    "scores = model.evaluate(x_test, y_test, verbose=1)\n",
    "print('Test loss:', scores[0])\n",
    "print('Test accuracy: %0.1f%%' % (100 * scores[1]) )\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
